###################################
# ADDS THE AGRICULTURAL DOMINANCE
###################################
# questions to bowydenbraber@gmail.com

library(dplyr)
library(tidyr)
library(forcats)

detach(package:plyr)

#read in dominance data from Javier's PNAS map
ol<-read.csv("union_domcu.csv")

#exclude cus without agr. dominance info for now (later we add info about PAs (and mining) which might not have dominance information)
oldom<-filter(ol,DOMINANCE_>0)
oldom2<-filter(oldom,CU_ID_50M>0)
oldom2$CD_GEOCODI2<-as.factor(oldom2$CD_GEOCODI)

#add raw data from javier - agr census
fdom<-read.csv("fdom.csv")
fdom_red<-fdom[c(2,18:37)]

#combine
olfdom<-merge(x=oldom2,y=fdom_red,by="CD_GEOCODI",all.x=TRUE)

#read in the spatial information and merge it to the dataset
areas<-read.csv("gis_cu00_area.csv")
areas<-areas[c("CU_ID_50M","area_b2")]
olareas<-merge(x=olfdom,y=areas,by="CU_ID_50M",all.x=TRUE)
olareas$perc<-olareas$area_domcu/olareas$area_b2*100

#add categories of dominance
#categories:
#cat1: less than 10 ha
#cat2: 10-50 ha
#cat3: 50-200 ha
#cat4: >200 ha

olareas$cat1<-apply(olareas[c(28:44)],1,function(x) sum(x[1:9]))
olareas$cat2<-apply(olareas[c(28:44)],1,function(x) sum(x[10:11]))
olareas$cat3<-apply(olareas[c(28:44)],1,function(x) sum(x[12:13]))
olareas$cat4<-apply(olareas[c(28:44)],1,function(x) sum(x[14:17]))


olareas_sum<-summarise(group_by(olareas,CU_ID_50M),
                       sumperc=sum(perc/100),
                       cat1=sum(cat1*perc/100),
                       cat2=sum(cat2*perc/100),
                       cat3=sum(cat3*perc/100),
                       cat4=sum(cat4*perc/100)
)

#Dominant actor based on maximum number of properties within each size class
olareas_sum$DOM_actor<-colnames(olareas_sum[c(3:6)])[max.col(olareas_sum[c(3:6)],ties.method="first")]


#part 2: add PA information and create final file

#read census and mapbiomas data
cu_base<-read.csv("CU_BASEDATA.csv")

#add pa increases
for(patype in c("ALL","UC_X","PI","US_X","TI")) cu_base[,paste("p_",patype,"_incr",sep="")] <- cu_base[,paste("p_",patype,"_10",sep="")] - cu_base[,paste("p_",patype,"_00",sep="")]
for(patype in c("UC_X","PI","US_X","TI")) cu_base[,paste("is_",patype,"_incr",sep="")] <- cu_base[,paste("p_",patype,"_incr",sep="")] > 0.1
cu_base$is_NOPA_incr <- cu_base$p_ALL_incr < 0.01

#add pa presences
for(patype in c("ALL","UC_X","PI","US_X","TI")) cu_base[,paste("is_",patype,"_present",sep="")] <- cu_base[,paste("p_",patype,"_00",sep="")] > 0.1
cu_base$is_NOPA <- cu_base$p_ALL_00 < 0.01

#adds the information about mines and PAs (because I would still like to use CTs with missing dominance data, but containing PAs and/or mines)
cu_agri<-merge(cu_base[c("CU_ID_50M","mine_presabs_before2000","mine_presabs_after2000","p_UC_X_incr","p_TI_incr","is_UC_X_present","is_TI_present")],olareas_sum,by="CU_ID_50M",all.x=TRUE)

#filters out CTs without dominance, outside PAs
cu_agri<-filter(cu_agri,mine_presabs_before2000==1 | mine_presabs_after2000==1| p_UC_X_incr>0.1  | p_TI_incr >0.1 | is_UC_X_present==TRUE | is_TI_present==TRUE | sumperc>0.1)

#Solve NA level
cu_agri$DOM_actor<-fct_explicit_na(cu_agri$DOM_actor, "cat0")
levels(cu_agri$DOM_actor)

#merge base data with actor data
cu_base_agri<-merge(cu_base,cu_agri[c(1,8:13)],by="CU_ID_50M")

#add control
cu_base_agri$control<-ifelse(cu_base_agri$is_ALL_present==FALSE & cu_base_agri$is_NOPA_incr==TRUE,1,0)

###add proportion settled

fiscmodha_sum<-read.csv("fiscmodha_sum.csv")
fiscmodha_sum$propsettled<-fiscmodha_sum$fisc_totha/fiscmodha_sum$area_fromareas*100
fiscmodha_sum$propsettled<-round(fiscmodha_sum$propsettled/100,digits=4)
fiscmodha_sum_red<-fiscmodha_sum[,c(2:3,14,26)]
names(fiscmodha_sum_red)[names(fiscmodha_sum_red) == 'areatot'] <- 'fisc_totarea'
cu_base_agri<-merge(cu_base_agri,fiscmodha_sum_red,by="CU_ID_50M",all.x=TRUE)

write.csv(cu_base_agri,file="CU00_COMPLETE.csv")


